Polynomial Regression¶

We will use a polynomial regression model to predict the values of sine(x)

In [1]:
# importing required libraries
import numpy as np #for processing arrays
import pandas as pd #for creating and processing DataFrames
import matplotlib.pyplot as plt #for creating plots
import seaborn as sns #for creating polts on DataFrames
import plotly.express as px #for creating interactive plots
import math #To find the values in radians, sine of angle
In [2]:
deg = [] #store angles in degrees
rad = [] #store angles in radians
sine = [] #store values of sin(angle)
In [3]:
for i in range(0,361):
    # 1 degree = pi/180 radians
    # angle_in_radians = i * np.pi / 180
    angle_in_radians = round(math.radians(i),7)
    sine_angle = round(math.sin(angle_in_radians),7)
    # storing data to lists or arrays
    deg.append(i)
    rad.append(angle_in_radians)
    sine.append(sine_angle)
In [4]:
sine_data_dictionary = {
    'angle_in_degrees' : deg,
    'angle_in_radians' : rad,
    'sine_of_angle' : sine
}
In [5]:
sine_dataframe = pd.DataFrame(sine_data_dictionary)
In [6]:
sine_dataframe
Out[6]:
angle_in_degrees angle_in_radians sine_of_angle
0 0 0.000000 0.000000
1 1 0.017453 0.017452
2 2 0.034907 0.034899
3 3 0.052360 0.052336
4 4 0.069813 0.069756
... ... ... ...
356 356 6.213372 -0.069756
357 357 6.230825 -0.052336
358 358 6.248279 -0.034899
359 359 6.265732 -0.017452
360 360 6.283185 -0.000000

361 rows × 3 columns

In [7]:
px.line(data_frame=sine_dataframe,x='angle_in_radians',y='sine_of_angle',hover_data=['angle_in_degrees'],title='Sinusoidal Curve')

Using LinearRegression model to predict values of sin(angle)¶

  • Predictions will be made using a linear equation i.e. y = mx + c, where the values of m(slope) and c(intercept) will be calculated by the model, y will be the predicted value of sine of x
In [8]:
from sklearn.linear_model import LinearRegression
In [9]:
X = pd.DataFrame(sine_dataframe['angle_in_radians']) # inputs
y = sine_dataframe['sine_of_angle'] #target
In [22]:
#Training the model
model = LinearRegression()
model.fit(X,y)
Out[22]:
LinearRegression()
In [11]:
predictions = model.predict(X)
In [12]:
sns.set_style('darkgrid')
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.lineplot(x='angle_in_radians',y='sine_of_angle',data=sine_dataframe);
plt.plot(sine_dataframe['angle_in_radians'],predictions);
#Orange colour represents the predicted values of sine of x

Using Polynomial Regression to predict the values of sine of x¶

  • In polynomial regression, a curve is used to predict values instead of a straight line
In [13]:
#using a polynomial regression model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
# from sklearn.linear_model import LinearRegression

#polynomial of degree 4
model_pr = make_pipeline(PolynomialFeatures(3),LinearRegression())
model_pr.fit(X,y)
polynomial_predictions = model_pr.predict(X)
In [14]:
sns.set_style('darkgrid')
sns.lineplot(x='angle_in_radians',y='sine_of_angle',data=sine_dataframe);
plt.plot(sine_dataframe['angle_in_radians'],predictions)
plt.plot(sine_dataframe['angle_in_radians'],polynomial_predictions);
In [15]:
model_pr_deg_5 = make_pipeline(PolynomialFeatures(5),LinearRegression())
model_pr_deg_5.fit(X,y)
polynomial_predictions_deg_5 = model_pr_deg_5.predict(X)
In [16]:
sns.set_style('darkgrid')
sns.lineplot(x='angle_in_radians',y='sine_of_angle',data=sine_dataframe);
plt.plot(sine_dataframe['angle_in_radians'],predictions)
plt.plot(sine_dataframe['angle_in_radians'],polynomial_predictions);
plt.plot(sine_dataframe['angle_in_radians'],polynomial_predictions_deg_5, 'r--');
In [17]:
# Hence curve formed by a polynomial of degree 5 overlaps the sine curve
# Hence model_pr_deg_5 is the most accurate and can be used to predict 
In [18]:
#predicting the values of sin of 30,45,60
input_data=sine_dataframe.loc[(sine_dataframe['angle_in_degrees']==30)|(sine_dataframe['angle_in_degrees']==45)|(sine_dataframe['angle_in_degrees']==60)]
input_data
Out[18]:
angle_in_degrees angle_in_radians sine_of_angle
30 30 0.523599 0.500000
45 45 0.785398 0.707107
60 60 1.047198 0.866025
In [19]:
result_df = pd.DataFrame({
    'sine_of_angle' : input_data['sine_of_angle'],
    'predictions_deg_5' : model_pr_deg_5.predict(pd.DataFrame(input_data['angle_in_radians'])),
    'predictions_deg_4' : model_pr.predict(pd.DataFrame(input_data['angle_in_radians'])),
    'predictions_deg_1' : model.predict(pd.DataFrame(input_data['angle_in_radians']))
})
In [20]:
result_df
Out[20]:
sine_of_angle predictions_deg_5 predictions_deg_4 predictions_deg_1
30 0.500000 0.494111 0.570043 0.789166
45 0.707107 0.706452 0.798614 0.710249
60 0.866025 0.870038 0.937003 0.631333
In [21]:
# polynomial regression model of degree 5 gives the maximum efficiency